#!/usr/bin/env bash
#python 执行 hadoop map-reduce 任务启动脚本

#设置命令路径
HADOOP_CMD="/usr/bin/hadoop"
STREAM_JAR_PATH="/opt/cloudera/parcels/CDH-5.0.5-1.cdh5.0.5.p0.8/lib/hadoop-0.20-mapreduce/contrib/streaming/hadoop-streaming-2.3.0-mr1-cdh5.0.5.jar"

#输入文件夹，hdfs目录
INPUT_FILE_PATH="/tmp/cctest"
#输出文件夹，hdfs目录
OUTPUT_PATH="/tmp/ccout4"

#执行之前必须删除输出文件夹，指定的输出文件夹必须不存在
${HADOOP_CMD} fs -rmr ${OUTPUT_PATH}

#执行mr任务
${HADOOP_CMD} jar ${STREAM_JAR_PATH} -input ${INPUT_FILE_PATH} -output ${OUTPUT_PATH} -mapper "python map.py" -reducer "python reduce.py" -file ./map.py -file ./reduce.py